# tclparser.tcl --
#
#       This file provides a Tcl implementation of a XML parser.
#       This file supports Tcl 8.1.
#
#       See xml-8.[01].tcl for definitions of character sets and
#       regular expressions.
#
# Copyright (c) 1998-2002 Zveno Pty Ltd
# http://www.zveno.com/
#
# Zveno makes this software and all associated data and documentation
# ('Software') available free of charge for any purpose.
# Copies may be made of this Software but all of this notice must be included
# on any copy.
#
# The Software was developed for research purposes and Zveno does not warrant
# that it is error free or fit for any purpose.  Zveno disclaims any
# liability for all claims, expenses, losses, damages and costs any user may
# incur as a result of using, copying or modifying the Software.
#
# Copyright (c) 1997 Australian National University (ANU).
#
# ANU makes this software and all associated data and documentation
# ('Software') available free of charge for any purpose. You may make copies
# of the Software but you must include all of this notice on any copy.
#
# The Software was developed for research purposes and ANU does not warrant
# that it is error free or fit for any purpose.  ANU disclaims any
# liability for all claims, expenses, losses, damages and costs any user may
# incur as a result of using, copying or modifying the Software.
#
# $Id: tclparser.tcl 57 2008-11-02 13:57:58Z sgolovan $

package require Tcl 8.1

package provide xml::tclparser 2.0

package require -exact xmldefs 2.0

package require -exact sgmlparser 1.0

namespace eval xml::tclparser {

    namespace export create createexternal externalentity parse \
                     configure get delete

    # Tokenising expressions

    variable tokExpr1 $::xml::tokExpr1
    variable tokExpr2 $::xml::tokExpr2
    variable tokExpr3 $::xml::tokExpr3
    variable substExpr $::xml::substExpr

    # Register this parser class

    ::xml::parserclass create tcl \
            -createcommand [namespace code create] \
            -createentityparsercommand [namespace code createentityparser] \
            -parsecommand [namespace code parse] \
            -configurecommand [namespace code configure] \
            -deletecommand [namespace code delete]
}

# xml::tclparser::create --
#
#       Creates XML parser object.
#
# Arguments:
#       name        unique identifier for this instance
#
# Results:
#       The state variable is initialised.

proc xml::tclparser::create name {

    # Initialise state variable
    upvar \#0 [namespace current]::$name parser
    array set parser [list \
        -name $name                                                      \
        -final 1                                                         \
        -namespace 1                                                     \
        -validate 0                                                      \
        -statevariable [namespace current]::$name                        \
        -baseurl {}                                                      \
        internaldtd {}                                                   \
        entities [namespace current]::Entities$name                      \
        extentities [namespace current]::ExtEntities$name                \
        parameterentities [namespace current]::PEntities$name            \
        externalparameterentities [namespace current]::ExtPEntities$name \
        elementdecls [namespace current]::ElDecls$name                   \
        attlistdecls [namespace current]::AttlistDecls$name              \
        notationdecls [namespace current]::NotDecls$name                 \
        depth 0                                                          \
        leftover {}                                                      \
    ]

    # Initialise entities with predefined set
    array set [namespace current]::Entities$name [array get ::sgml::EntityPredef]

    return $name
}

# xml::tclparser::createentityparser --
#
#       Creates XML parser object for an entity.
#
# Arguments:
#       name        name for the new parser
#       parent      name of parent parser
#
# Results:
#       The state variable is initialised.

proc xml::tclparser::createentityparser {parent name} {
    upvar #0 [namespace current]::$parent p

    # Initialise state variable
    upvar \#0 [namespace current]::$name external
    array set external [array get p]

    array set external [list \
        -name $name                               \
        -statevariable [namespace current]::$name \
        internaldtd {}                            \
        line 0                                    \
    ]
    incr external(depth)

    return $name
}

# xml::tclparser::configure --
#
#       Configures a XML parser object.
#
# Arguments:
#       name        unique identifier for this instance
#       args        option name/value pairs
#
# Results:
#       May change values of config options

proc xml::tclparser::configure {name args} {
    upvar \#0 [namespace current]::$name parser

    # BUG: very crude, no checks for illegal args
    # Mats: Should be synced with sgmlparser.tcl
    set options {
        -elementstartcommand -elementendcommand \
        -characterdatacommand -processinginstructioncommand \
        -externalentitycommand -xmldeclcommand \
        -doctypecommand -commentcommand \
        -entitydeclcommand -unparsedentitydeclcommand \
        -parameterentitydeclcommand -notationdeclcommand \
        -elementdeclcommand -attlistdeclcommand \
        -paramentityparsing -defaultexpandinternalentities \
        -startdoctypedeclcommand -enddoctypedeclcommand \
        -entityreferencecommand -warningcommand \
        -errorcommand -final -namespace \
        -validate -baseurl \
        -name -emptyelement \
        -parseattributelistcommand -parseentitydeclcommand \
        -normalize -internaldtd \
        -reportempty -ignorewhitespace \
        -reportempty \
    }
    set usage [join $options ", "]
    regsub -all -- - $options {} options
    set pat ^-([join $options |])$
    foreach {flag value} $args {
        if {[regexp $pat $flag]} {
            # Validate numbers
            if {[info exists parser($flag)] && \
                    [string is integer -strict $parser($flag)] && \
                    ![string is integer -strict $value]} {
                return -code error "Bad value for $flag ($value), must be integer"
            }
            set parser($flag) $value
        } else {
            return -code error "Unknown option $flag, can be: $usage"
        }
    }

    return {}
}

# xml::tclparser::parse --
#
#       Parses document instance data
#
# Arguments:
#       name        parser object
#       xml         data
#       args        configuration options
#
# Results:
#       Callbacks are invoked

proc xml::tclparser::parse {name xml args} {

    array set options $args
    upvar \#0 [namespace current]::$name parser
    variable tokExpr1
    variable tokExpr2
    variable tokExpr3
    variable substExpr

    # Mats:
    if {[llength $args]} {
        eval {configure $name} $args
    }

    set parseOptions [list \
            -emptyelement [namespace code ParseEmpty] \
            -parseattributelistcommand [namespace code ParseAttrs] \
            -parseentitydeclcommand [namespace code ParseEntity] \
            -normalize 0]
    eval lappend parseOptions \
            [array get parser -*command] \
            [array get parser -reportempty] \
            [array get parser -name] \
            [array get parser -baseurl] \
            [array get parser -validate] \
            [array get parser -namespace] \
            [array get parser -final] \
            [array get parser -defaultexpandinternalentities] \
            [array get parser entities] \
            [array get parser extentities] \
            [array get parser parameterentities] \
            [array get parser externalparameterentities] \
            [array get parser elementdecls] \
            [array get parser attlistdecls] \
            [array get parser notationdecls]

    # Mats:
    # If -final 0 we also need to maintain the state with a -statevariable !
    if {!$parser(-final)} {
        eval lappend parseOptions [array get parser -statevariable]
    }

    set dtdsubset no
    catch {set dtdsubset $options(-dtdsubset)}
    switch -- $dtdsubset {
        internal {
            # Bypass normal parsing
            lappend parseOptions -statevariable $parser(-statevariable)
            array set intOptions [array get ::sgml::StdOptions]
            array set intOptions $parseOptions
            ::sgml::ParseDTD:Internal [array get intOptions] $xml
            return {}
        }
        external {
            # Bypass normal parsing
            lappend parseOptions -statevariable $parser(-statevariable)
            array set intOptions [array get ::sgml::StdOptions]
            array set intOptions $parseOptions
            ::sgml::ParseDTD:External [array get intOptions] $xml
            return {}
        }
        default {
            # Pass through to normal processing
        }
    }

    lappend tokenOptions  \
            -internaldtdvariable [namespace current]::${name}(internaldtd)

    # Mats: If -final 0 we also need to maintain the state with a -statevariable !
    if {!$parser(-final)} {
        eval lappend tokenOptions [array get parser -statevariable] \
          [array get parser -final]
    }

    # Mats:
    # Why not the first four? Just padding? Lrange undos \n interp.
    # It is necessary to have the first four as well if chopped off in
    # middle of pcdata.
    set tokenised [lrange \
            [eval {::sgml::tokenise $xml $tokExpr1 $tokExpr2 $tokExpr3 $substExpr} $tokenOptions] \
        0 end]

    lappend parseOptions -internaldtd [list $parser(internaldtd)]
    eval ::sgml::parseEvent [list $tokenised] $parseOptions

    return {}
}

# xml::tclparser::ParseEmpty --  Tcl 8.1+ version
#
#       Used by parser to determine whether an element is empty.
#       This is usually dead easy in XML, but as always not quite.
#       Have to watch out for empty element syntax
#
# Arguments:
#       tag         element name
#       attr        attribute list (raw)
#       e           End tag delimiter.
#
# Results:
#       Return value of e

proc xml::tclparser::ParseEmpty {tag attr e} {
    switch -glob -- [string length $e],[regexp "/[::xml::cl $::xml::Wsp]*$" $attr] {
        0,0 {
            return {}
        }
        0,* {
            return /
        }
        default {
            return $e
        }
    }
}

# xml::tclparser::ParseAttrs -- Tcl 8.1+ version
#
#       Parse element attributes.
#
# There are two forms for name-value pairs:
#
#       name="value"
#       name='value'
#
# Arguments:
#       opts        parser options
#       attrs       attribute string given in a tag
#
# Results:
#       Returns a Tcl list representing the name-value pairs in the
#       attribute string
#
#       A ">" occurring in the attribute list causes problems when parsing
#       the XML.  This manifests itself by an unterminated attribute value
#       and a ">" appearing the element text.
#       In this case return a three element list;
#       the message "unterminated attribute value", the attribute list it
#       did manage to parse and the remainder of the attribute list.

proc xml::tclparser::ParseAttrs {opts attrs} {

    set result {}

    while {[string length [string trim $attrs]]} {
        if {[regexp ($::xml::Name)[::sgml::cl $::xml::Wsp]*=[::sgml::cl $::xml::Wsp]*(\"|')([::sgml::cl ^<]*?)\\2(.*) \
                    $attrs -> attrName delimiter value attrs]} {
            lappend result $attrName [NormalizeAttValue $opts $value]
        } elseif {[regexp $::xml::Name[::sgml::cl $::xml::Wsp]*=[::sgml::cl $::xml::Wsp]*(\"|')[::sgml::cl ^<]*\$ \
                          $attrs]} {
            return -code error [list {unterminated attribute value} $result $attrs]
        } else {
            return -code error "invalid attribute list"
        }
    }

    return $result
}

# xml::tclparser::NormalizeAttValue --
#
#       Perform attribute value normalisation.  This involves:
#       . character references are appended to the value
#       . entity references are recursively processed and replacement value appended
#       . whitespace characters cause a space to be appended
#       . other characters appended as-is
#
# Arguments:
#       opts        parser options
#       value       unparsed attribute value
#
# Results:
#       Normalised value returned.

proc xml::tclparser::NormalizeAttValue {opts value} {

    # sgmlparser already has backslashes protected
    # Protect Tcl specials
    regsub -all {([][$])} $value {\\\1} value

    # Deal with white space
    regsub -all "\[$::xml::Wsp\]" $value { } value

    # Find entity refs
    regsub -all {&([^;]+);} $value {[NormalizeAttValue:DeRef $opts {\1}]} value

    return [subst $value]
}

# xml::tclparser::NormalizeAttValue:DeRef --
#
#       Handler to normalize attribute values
#
# Arguments:
#       opts        parser options
#       ref         entity reference
#
# Results:
#       Returns character

proc xml::tclparser::NormalizeAttValue:DeRef {opts ref} {

    switch -glob -- $ref {
        #x* {
            scan [string range $ref 2 end] %x value
            set char [format %c $value]
            # Check that the char is legal for XML
            if {[regexp [format {^[%s]$} $::xml::Char] $char]} {
                return $char
            } else {
                return -code error "illegal character"
            }
        }
        #* {
            scan [string range $ref 1 end] %d value
            set char [format %c $value]
            # Check that the char is legal for XML
            if {[regexp [format {^[%s]$} $::xml::Char] $char]} {
                return $char
            } else {
                return -code error "illegal character"
            }
        }
        lt -
        gt -
        amp -
        quot -
        apos {
            array set map {lt < gt > amp & quot \" apos '}
            return $map($ref)
        }
        default {
            # A general entity.  Must resolve to a text value - no element structure.

            array set options $opts
            upvar #0 $options(entities) map

            if {[info exists map($ref)]} {

                if {[regexp < $map($ref)]} {
                    return -code error "illegal character \"<\" in attribute value"
                }

                if {![regexp & $map($ref)]} {
                    # Simple text replacement
                    return $map($ref)
                }

                # There are entity references in the replacement text.
                # Can't use child entity parser since must catch element structures

                return [NormalizeAttValue $opts $map($ref)]

            } elseif {[string compare $options(-entityreferencecommand) "::sgml::noop"]} {

                set result [uplevel #0 $options(-entityreferencecommand) [list $ref]]

                return $result

            } else {
                return -code error "unable to resolve entity reference \"$ref\""
            }
        }
    }
}

# xml::tclparser::ParseEntity --
#
#       Parse general entity declaration
#
# Arguments:
#       data        text to parse
#
# Results:
#       Tcl list containing entity declaration

proc xml::tclparser::ParseEntity data {
    set data [string trim $data]
    if {[regexp $::sgml::ExternalEntityExpr $data \
                -> type delimiter1 id1 discard delimiter2 id2 optNDATA ndata]} {
        switch -- $type {
            PUBLIC {
                return [list external $id2 $id1 $ndata]
            }
            SYSTEM {
                return [list external $id1 {} $ndata]
            }
        }
    } elseif {[regexp {^(\"|')(.*?)\1$} $data discard delimiter value]} {
        return [list internal $value]
    } else {
        return -code error "badly formed entity declaration"
    }
}

# xml::tclparser::delete --
#
#       Destroy parser data
#
# Arguments:
#       name        parser object
#
# Results:
#       Parser data structure destroyed

proc xml::tclparser::delete name {
    upvar \#0 [namespace current]::$name parser
    catch {::sgml::ParserDelete $parser(-statevariable)}
    catch {unset parser}
    return {}
}

# xml::tclparser::get --
#
#       Retrieve additional information from the parser
#
# Arguments:
#       name        parser object
#       method      info to retrieve
#       args        additional arguments for method
#
# Results:
#       Depends on method

proc xml::tclparser::get {name method args} {
    upvar #0 [namespace current]::$name parser

    switch -- $method {

        elementdecl {
            switch -- [llength $args] {

                0 {
                    # Return all element declarations
                    upvar #0 $parser(elementdecls) elements
                    return [array get elements]
                }

                1 {
                    # Return specific element declaration
                    upvar #0 $parser(elementdecls) elements
                    if {[info exists elements([lindex $args 0])]} {
                        return [array get elements [lindex $args 0]]
                    } else {
                        return -code error "element \"[lindex $args 0]\" not\
                                            declared"
                    }
                }

                default {
                    return -code error "wrong number of arguments: should be\
                                        \"elementdecl ?element?\""
                }
            }
        }

        attlist {
            if {[llength $args] != 1} {
                return -code error "wrong number of arguments: should be\
                                    \"get attlist element\""
            }

            upvar #0 $parser(attlistdecls)

            return {}
        }

        entitydecl {
        }

        parameterentitydecl {
        }

        notationdecl {
        }

        default {
            return -code error "unknown method \"$method\""
        }
    }

    return {}
}

# xml::tclparser::ExternalEntity --
#
#       Resolve and parse external entity
#
# Arguments:
#       name        parser object
#       base        base URL
#       sys         system identifier
#       pub         public identifier
#
# Results:
#       External entity is fetched and parsed

proc xml::tclparser::ExternalEntity {name base sys pub} {}

# vim:ft=tcl:ts=8:sw=4:sts=4:et
